#loading the necessary packages
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.7 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
Warning: package ‘ggplot2’ was built under R version 4.2.1Warning: package ‘dplyr’ was built under R version 4.2.1── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(janitor)
Warning: package ‘janitor’ was built under R version 4.2.1
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(readxl)
Warning: package ‘readxl’ was built under R version 4.2.1
library(here)
Warning: package ‘here’ was built under R version 4.2.1here() starts at C:/Users/nico-/OneDrive/Desktop/Codeclan/dirty_data_project/dirty_data_project/task_4/dirty_data_task_4
#loading the data
candy_2015 <- read_excel(here("../raw_data/boing-boing-candy-2015.xlsx")) %>%
clean_names()
candy_2016 <- read_excel(here("../raw_data/boing-boing-candy-2016.xlsx")) %>%
clean_names()
candy_2017 <- read_excel(here("../raw_data/boing-boing-candy-2017.xlsx")) %>%
clean_names()
New names:
candy_2016
candy_2015
candy_2017
#order of operations: #decide which variables to keep for the analysis according to business questions # Pivot data the same for all 3 datasets # perform pivot on all three # Investigate other columns
#keeping variables useful for analysis from 2015 dataset #The variables are a bit confusing. I will only keep only variables that are actual food and drop anything which is not food.
candy_2015 <- candy_2015 %>%
select(-c(
cash_or_other_forms_of_legal_tender,
creepy_religious_comics_chick_tracts,
hugs_actual_physical_hugs,
please_leave_any_remarks_or_comments_regarding_your_choices:
please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_beyonce_knowles,
dental_paraphenalia,
generic_brand_acetaminophen,
peterson_brand_sidewalk_chalk
))
candy_2015
#time to rename some columns. Maybe not that necessary but I would like to have my final results with neat names.
candy_2015 <- candy_2015 %>%
rename("trick_or_treat" = are_you_going_actually_going_trick_or_treating_yourself,
"grand_bar" = x100_grand_bar,
"brown_globs" = anonymous_brown_globs_that_come_in_black_and_orange_wrappers,
"any_candy_bar" = any_full_sized_candy_bar,
"brach_without_candy_corn" = brach_products_not_including_candy_corn,
"high_fructose_corn_syrup" = vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein,
"restaurant_candy" = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants,
"chick_o_sticks" = chick_o_sticks_we_don_t_know_what_that_is,
"old_marshmallow" = those_odd_marshmallow_circus_peanut_things)
candy_2015
#same thing for the 2016 dataset
#selecting only the necessary columns for final analysis
candy_2016 <- candy_2016 %>%
select(-c(cash_or_other_forms_of_legal_tender,
creepy_religious_comics_chick_tracts,
dental_paraphenalia,
generic_brand_acetaminophen,
hugs_actual_physical_hugs,
person_of_interest_season_3_dvd_box_set_not_including_disc_4_with_hilarious_outtakes,
sourpatch_kids_i_e_abominations_of_nature,
vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein,
please_list_any_items_not_included_above_that_give_you_joy:
york_peppermint_patties_ignore))
candy_2016
candy_2016 <- candy_2016 %>%
rename("trick_or_treat" = are_you_going_actually_going_trick_or_treating_yourself,
"gender" = your_gender,
"age" = how_old_are_you,
"country" = which_country_do_you_live_in,
"state_province" = which_state_province_county_do_you_live_in,
"grand_bar" = x100_grand_bar,
"brown_globs" = anonymous_brown_globs_that_come_in_black_and_orange_wrappers,
)
candy_2016
#sorting the country column